#' ---
#' title: "ANES 2024 Processing"
#' author: ""
#' date: "`r Sys.Date()`"
#' encoding: UTF-8
#' output:
#'   distill_article:
#'     toc: true
#'     toc_float: true
#'     toc_depth: 2
#' header-includes:
#'  - \usepackage{booktabs}
#'  - \usepackage{longtable}
#'  - \usepackage{array}
#'  - \usepackage{multirow}
#'  - \usepackage{wrapfig}
#'  - \usepackage{float}
#'  - \usepackage{colortbl}
#'  - \usepackage{pdflscape}
#'  - \usepackage{tabu}
#'  - \usepackage{threeparttable}
#'  - \usepackage{threeparttablex}
#'  - \usepackage[normalem]{ulem}
#'  - \usepackage{makecell}
#'  - \usepackage{dcolumn}
#'  - \usepackage{setspace}\doublespacing
#' ---

################################################
## ANES text analysis
################################################

## ---- spin_code, eval = FALSE, include = FALSE ---------------------------
# spin code to output Rmd
# set knit = FALSE to generate Rmd / Rnw but not compile

# rmarkdown::render(input = here::here("text_code", "anes2020_processing.R"), output_format = "pdf_document", clean = FALSE)

## see all chunks and clear all chunks
# knitr:::knit_code$get()      # check chunks in the current session
# knitr:::knit_code$restore()  # clean up the session

## ---- setup_global_options, include = FALSE -------------------
library(knitr)
# opts_chunk$set(fig.lp = "fig:", echo = FALSE, message = FALSE, warning = FALSE, error = FALSE, include = TRUE, dev = "cairo_pdf")

opts_chunk$set(
    fig.lp  = "fig:",
    echo    = FALSE,
    message = FALSE,
    warning = FALSE,
    error   = FALSE,
    dev     = c("cairo_pdf", "png"), 
    dpi     = 288
    
)


## ---- load-packages-abes2020, include = FALSE ----

source(here::here("text_code/text_packages_rep.R"))



# ANES 2020

## ---- load_data ----

# ANES 2016 data
load(file = here("text_data_output", "anes2016_processed.Rdata"), verbose = TRUE)

# 2020
load(file = here("text_data_output", "anes2020_merged.Rdata"), verbose = TRUE)


# ANES 2024 SPSS version with has labeled columns
anes24 <- read_sav(file = here("text_data_raw", "ANES", "anes_timeseries_2024_spss_20250808.sav"))

dim(anes24)


anes24_a20 <- left_join(anes24, a20, by = c("V200001" = "V200001"))



# V241110 - PRE: What is it that R likes about Democratic Presidential candidate [text]
anes24_like_dem <- read_excel( #like dem
    here("text_data_raw", "ANES", "anes_timeseries_2024_redactedopenends_excel_20250923.xlsx"), 
    sheet = "V241110") %>% 
    clean_names() %>% 
    rename(text_dem24_like = v241110_pre_what_is_it_that_r_likes_about_democratic_presidential_candidate_text)

# V241112 - PRE: What is it that R dislikes about Democratic Presidential candidate [text]
anes24_dislike_dem <- read_excel( #dislike dem
    here("text_data_raw", "ANES", "anes_timeseries_2024_redactedopenends_excel_20250923.xlsx"), 
    sheet = "V241112") %>% 
    clean_names() %>% 
    rename(text_dem24_dislike = v241112_pre_what_is_it_that_r_dislikes_about_democratic_presidential_candidate_text)

# V241114 - PRE: What is it that R likes about Republican Presidential candidate [text]
anes24_like_rep <- read_excel( #like dem
    here("text_data_raw", "ANES", "anes_timeseries_2024_redactedopenends_excel_20250923.xlsx"), 
    sheet = "V241114") %>% 
    clean_names() %>% 
    rename(text_rep24_like = v241114_pre_what_is_it_that_r_likes_about_republican_presidential_candidate_text)

# V241116 - PRE: What is it that R dislikes about Republican Presidential candidate [text]
anes24_dislike_rep <- read_excel( #dislike rep
    here("text_data_raw", "ANES", "anes_timeseries_2024_redactedopenends_excel_20250923.xlsx"), 
    sheet = "V241116") %>% 
    clean_names() %>% 
    rename(text_rep24_dislike = v241116_pre_what_is_it_that_r_dislikes_about_republican_presidential_candidate_text)


a24_text <- anes24_a20 %>%
    left_join(anes24_like_dem,    by = c("V240001" = "v240001")) %>%
    left_join(anes24_dislike_dem, by = c("V240001" = "v240001")) %>%
    left_join(anes24_like_rep,    by = c("V240001" = "v240001")) %>%
    left_join(anes24_dislike_rep, by = c("V240001" = "v240001"))


nchar24 <- function(x) {
    x <- case_when(
        is.na(x) ~ 0,
        x == -1  ~ 0,
        x == -9  ~ 0,
        TRUE ~ nchar(x)
    )
    return(x)
}

text_vars <- c("text_dem24_like", "text_dem24_dislike", "text_rep24_like", "text_rep24_dislike")

a24_text <- a24_text %>%
    mutate(across(all_of(text_vars), ~ nchar24(.), .names = "{.col}_nchar"))


#text_vars <- c("text_dem24_like_nchar", "text_dem24_dislike_nchar", "text_rep24_like_nchar", "text_rep24_dislike_nchar")



dim(a24_text)

#tabyl(a24$V240003)
#haven::print_labels(a24$V240003)
# Labels:
#     value                 label
# 1 1. Panel (2016-20-24)
# 2 2. Fresh sample (WEB)
# 3 3. Fresh sample (FTF)
# 4                4. GSS

# pull observations recorded in 2016 and 2020
panel24 <- a24_text #%>% filter(V240003 == 1)

dim(panel24)


panel24 <- panel24 %>% 
    mutate(
        pid24_4 = case_when(
            V241221 == 0 ~ "Other", #No preference {VOL, FtF}
            V241221 == 1 ~ "Dem", #Democrat
            V241221 == 2 ~ "Rep", #Republican
            V241221 == 3 ~ "Ind", #Independent
            V241221 == 5 ~ "Other", #Other party {SPECIFY}
        TRUE ~ NA_character_)
    )


panel24 <- panel24 %>% 
    mutate(
        # identify subjects who switch Party ID
        # switcher_bin = case_when(
        #     pid3_16 == "Dem" & (pid3_20 == 2 | pid3_20 == 3 | pid3_20 == 5) ~ 1, 
        #     pid3_16 == "Rep" & (pid3_20 == 1 | pid3_20 == 3 | pid3_20 == 5) ~ 1, 
        #     pid3_16 == "Ind" & (pid3_20 == 1 | pid3_20 == 2 | pid3_20 == 5) ~ 1, 
        #     TRUE ~ 0),
        # switcher_fct = case_when(
        #     pid3_16 == "Dem" & (pid3_20 == 1) ~ "Dem16->Dem20", 
        #     pid3_16 == "Dem" & (pid3_20 == 2) ~ "Dem16->Rep20", 
        #     pid3_16 == "Dem" & (pid3_20 == 3) ~ "Dem16->Ind20", 
        #     pid3_16 == "Rep" & (pid3_20 == 2) ~ "Rep16->Rep20", 
        #     pid3_16 == "Rep" & (pid3_20 == 1) ~ "Rep16->Dem20", 
        #     pid3_16 == "Rep" & (pid3_20 == 3) ~ "Rep16->Ind20", 
        #     pid3_16 == "Ind" & (pid3_20 == 3) ~ "Ind16->Ind20", 
        #     pid3_16 == "Ind" & (pid3_20 == 1) ~ "Ind16->Dem20", 
        #     pid3_16 == "Ind" & (pid3_20 == 2) ~ "Ind16->Rep20", 
        #     TRUE ~ NA_character_),
        switcher24_fct = case_when(
            pid3_16 == "Dem" & (pid24_4 == "Dem") ~ "Dem16->Dem24", 
            pid3_16 == "Dem" & (pid24_4 != "Dem") ~ "Dem16->!Dem24", 
            #pid3_16 == "Dem" & (pid3_20 == 3) ~ "Dem16->!Dem20", 
            pid3_16 == "Rep" & (pid24_4 == "Rep") ~ "Rep16->Rep24", 
            pid3_16 == "Rep" & (pid24_4 != "Rep") ~ "Rep16->!Rep24", 
            #pid3_16 == "Rep" & (pid3_20 == 3) ~ "Rep16->!Rep20", 
            pid3_16 == "Ind" & (pid24_4 == "Ind") ~ "Ind16->Ind24", 
            pid3_16 == "Ind" & (pid24_4 != "Ind") ~ "Ind16->!Ind24", 
            #pid3_16 == "Ind" & (pid3_20 == 2) ~ "Ind16->!Ind20", 
            TRUE ~ NA_character_) %>% 
            forcats::fct_relevel(c("Ind16->Ind24") )
    )



panel24 <- panel24 %>% 
    mutate(
        switcher2024_fct = case_when(
            pid3_20 == 1 & (pid24_4 == "Dem") ~ "Dem20->Dem24", 
            pid3_20 == 1 & (pid24_4 != "Dem") ~ "Dem20->!Dem24", 
            #pid3_16 == "Dem" & (pid3_20 == 3) ~ "Dem16->!Dem20", 
            pid3_20 == 2 & (pid24_4 == "Rep") ~ "Rep20->Rep24", 
            pid3_20 == 2 & (pid24_4 != "Rep") ~ "Rep20->!Rep24", 
            #pid3_16 == "Rep" & (pid3_20 == 3) ~ "Rep16->!Rep20", 
            pid3_20 == 3 & (pid24_4 == "Ind") ~ "Ind20->Ind24", 
            pid3_20 == 3 & (pid24_4 != "Ind") ~ "Ind20->!Ind24", 
            #pid3_16 == "Ind" & (pid3_20 == 2) ~ "Ind16->!Ind20", 
            TRUE ~ NA_character_) %>% 
            forcats::fct_relevel(c("Ind20->Ind24") )
    )


panel24 <- panel24 %>% 
    mutate(
        switcher162024_fct = case_when(
            pid3_16 == "Dem" & pid3_20 == 1 & (pid24_4 == "Dem") ~ "Dem16->Dem20->Dem24", 
            pid3_16 == "Dem" & pid3_20 != 1 & (pid24_4 != "Dem") ~ "Dem16->!Dem20->!Dem24",
            pid3_16 == "Dem" & pid3_20 == 1 & (pid24_4 != "Dem") ~ "Dem16->Dem20->!Dem24", 
            pid3_16 == "Dem" & pid3_20 != 1 & (pid24_4 == "Dem") ~ "Dem16->!Dem20->Dem24", 
            
            pid3_16 == "Ind" & pid3_20 == 3 & (pid24_4 == "Ind") ~ "Ind16->Ind20->Ind24", 
            pid3_16 == "Ind" & pid3_20 != 3 & (pid24_4 != "Ind") ~ "Ind16->!Ind20->!Ind24", 
            pid3_16 == "Ind" & pid3_20 == 3 & (pid24_4 != "Ind") ~ "Ind16->Ind20->!Ind24", 
            pid3_16 == "Ind" & pid3_20 != 3 & (pid24_4 == "Ind") ~ "Ind16->!Ind20->Ind24", 
            
            pid3_16 == "Rep" & pid3_20 == 2 & (pid24_4 == "Rep") ~ "Rep16->Rep20->Rep24", 
            pid3_16 == "Rep" & pid3_20 != 2 & (pid24_4 != "Rep") ~ "Rep16->!Rep20->!Rep24", 
            pid3_16 == "Rep" & pid3_20 == 2 & (pid24_4 != "Rep") ~ "Rep16->Rep20->!Rep24", 
            pid3_16 == "Rep" & pid3_20 != 2 & (pid24_4 == "Rep") ~ "Rep16->!Rep20->Rep24", 
            
            TRUE ~ NA_character_) %>% 
            forcats::fct_relevel(c("Ind16->Ind20->Ind24") )
    )


panel24 <- panel24 %>% 
    mutate(
        ever_switcher162024_fct = case_when(
            pid3_16 == "Dem" & pid3_20 == 1 & (pid24_4 == "Dem") ~ "Dem16->Dem20->Dem24", 
            pid3_16 == "Dem" & pid3_20 != 1  ~ "Dem16->!Dem20",
            pid3_16 == "Dem" & pid3_20 == 1 & (pid24_4 != "Dem") ~ "Dem16->Dem20->!Dem24", 

            pid3_16 == "Ind" & pid3_20 == 3 & (pid24_4 == "Ind") ~ "Ind16->Ind20->Ind24", 
            pid3_16 == "Ind" & pid3_20 != 3  ~ "Ind16->!Ind20", 
            pid3_16 == "Ind" & pid3_20 == 3 & (pid24_4 != "Ind") ~ "Ind16->Ind20->!Ind24", 

            pid3_16 == "Rep" & pid3_20 == 2 & (pid24_4 == "Rep") ~ "Rep16->Rep20->Rep24", 
            pid3_16 == "Rep" & pid3_20 != 2 ~ "Rep16->!Rep20", 
            pid3_16 == "Rep" & pid3_20 == 2 & (pid24_4 != "Rep") ~ "Rep16->Rep20->!Rep24", 

            TRUE ~ NA_character_) %>% 
            forcats::fct_relevel(c("Ind16->Ind20->Ind24") )
    )



panel24 <- panel24 %>% 
    mutate(
        race24_fct = case_when(
            V241501x == 1 ~ "White",
            V241501x == 2 ~ "Black",
            V241501x == 3 ~ "Hispanic",
            V241501x == 4 ~ "Asian",
            # V241501x == 5 ~ "Native",
            # V241501x == 6 ~ "Multiple",
            # V241501x <  0 ~ "Refused-DK",
            TRUE ~ "Other"
        ),
        racechg24_fct = case_when(
            race16 == "white"    & race24_fct == "White"    ~ "White16->White24",
            race16 == "white"    & race24_fct != "White"    ~ "White16->!White24",
            race16 == "black"    & race24_fct == "Black"    ~ "Black16->Black24",
            race16 == "black"    & race24_fct != "Black"    ~ "Black16->!Black24",
            race16 == "hispanic" & race24_fct == "Hispanic" ~ "Hispanic16->Hispanic24",
            race16 == "hispanic" & race24_fct != "Hispanic" ~ "Hispanic16->!Hispanic24",
            race16 == "asian"    & race24_fct == "Asian"    ~ "Asian16->Asian24",
            race16 == "asian"    & race24_fct != "Asian"    ~ "Asian16->!Asian24",
            # (race16 == "native_american" | race16 == "other")  & race20_fct == "Native"   ~ "Nativ16->Native20",
            # race16 == "white"    & race20_fct == "Multiple" ~ "White16->Multiple20",
            # race16 == "native_american"    & race20_fct == "Native"   ~ "Nativ16->Native20",
            TRUE ~ NA_character_
        ) %>% as.factor() %>% 
            relevel(ref = "White16->White24"),
        racechg24_fct2 = case_when(
            race16 == "white"    & race24_fct == "White"    ~ "White16->White24",
            race16 == "white"    & race24_fct != "White"    ~ "White16->!White24",
            race16 == "black"    & race24_fct == "Black"    ~ "PoC16->PoC24",
            race16 == "black"    & race24_fct != "Black"    ~ "PoC16->!PoC24",
            race16 == "hispanic" & race24_fct == "Hispanic" ~ "PoC16->PoC24",
            race16 == "hispanic" & race24_fct != "Hispanic" ~ "PoC16->!PoC24",
            race16 == "asian"    & race24_fct == "Asian"    ~ "PoC16->PoC24",
            race16 == "asian"    & race24_fct != "Asian"    ~ "PoC16->!PoC24",
            # (race16 == "native_american" | race16 == "other")  & race20_fct == "Native"   ~ "Nativ16->Native20",
            # race16 == "white"    & race20_fct == "Multiple" ~ "White16->Multiple20",
            # race16 == "native_american"    & race20_fct == "Native"   ~ "Nativ16->Native20",
            TRUE ~ NA_character_
        ) %>% as.factor() %>% 
            relevel(ref = "White16->White24")
    )



panel24 <- panel24 %>% 
    mutate(
        vote24 = case_when(
            V241049 == 1 ~ "Harris",
            V241049 == 2 ~ "Trump",
            V241049 == 3 ~ "No Vote",
            TRUE ~ NA_character_
        ),
        vote_harris24_bin = case_when(
            V241049 == 1 ~ 1,
            TRUE ~ 0
        ),
        vote_trump24_bin = case_when(
            V241049 == 2 ~ 1,
            TRUE ~ 0
        ),
        vote24_fct = case_when(
            V241049 == 1 ~ "Harris",
            V241049 == 2 ~ "Trump",
            TRUE ~ NA_character_
        )
    )


panel24 <- panel24 %>% 
    mutate(
        mode_pre24 = case_when(
            V240002a == 1 ~ "ftf",
            V240002a == 2 ~ "web",
            V240002a == 3 ~ "paper",
            V240002a == 4 ~ "tele",
            TRUE ~ NA_character_
        ),
        mode_post24 = case_when(
            V240002b == 1 ~ "ftf",
            V240002b == 2 ~ "web",
            V240002b == 3 ~ "paper",
            V240002b == 4 ~ "tele",
            V240002b == 5 ~ "video",
            TRUE ~ NA_character_
        )
        
    )





a24 <- panel24



# a24 <- panel24 %>%
#     #mutate(V240001 = as.character(V240001)) %>%
#     left_join(anes24_like_dem, by = c("V240001" = "v240001")) 


# a24 <- left_join(panel24, text2024_nchar, by = c("V240001" = "v240001"))








a24 <- a24 %>% 
    mutate(
        nonresp24_lddr       = as.numeric(text_dem24_like_nchar == 0) + as.numeric(text_rep24_dislike_nchar == 0),
        nonresp24_lrdd       = as.numeric(text_rep24_like_nchar == 0) + as.numeric(text_dem24_dislike_nchar == 0),
        nonresp24_all        = (nonresp24_lrdd - nonresp24_lddr),
        
        nonresp24_lddr_bin   = as.numeric(text_dem24_like_nchar == 0 | text_rep24_dislike_nchar == 0),
        nonresp24_lrdd_bin   = as.numeric(text_rep24_like_nchar == 0 | text_dem24_dislike_nchar == 0),
        
        nchar24_like_dem_ihs    = asinh(text_dem24_like_nchar),
        nchar24_dislike_dem_ihs = asinh(text_dem24_dislike_nchar),
        nchar24_like_rep_ihs    = asinh(text_rep24_like_nchar),
        nchar24_dislike_rep_ihs = asinh(text_rep24_dislike_nchar),
        
    )


a24 <- a24 %>%
    group_by(mode_pre24) %>%
    mutate(
        max_nchar24_like_dem_ihs     = max(nchar24_like_dem_ihs),
        max_nchar24_dislike_dem_ihs  = max(nchar24_dislike_dem_ihs),
        max_nchar24_like_rep_ihs     = max(nchar24_like_rep_ihs),
        max_nchar24_dislike_rep_ihs  = max(nchar24_dislike_rep_ihs)
    ) %>%
    ungroup()


a24 <- a24 %>% 
    mutate(
        nchar24_lddr_ihs        = (nchar24_like_dem_ihs/max_nchar24_like_dem_ihs + nchar24_dislike_rep_ihs/max_nchar24_dislike_rep_ihs),
        nchar24_lrdd_ihs        = (nchar24_like_rep_ihs/max_nchar24_like_rep_ihs + nchar24_dislike_dem_ihs/max_nchar24_dislike_dem_ihs),
        
        nchar24_partisan_ihs      = (nchar24_lrdd_ihs - nchar24_lddr_ihs)/2,
    )        





## ---- impute-missingness-2024 ----
# 
# # skipping racial_resent16, ft... due to post-wave drop out
# # i.e., structural missingness
# # 
# # Subset of key variables with missingness + relevant predictors
# vars_for_impute <- c("income16", "age16", "educ16", "ideo7_16", "female16", "race16")
# 
# # Create subset for imputation
# imp_data <- anes[ , vars_for_impute]
# 
# # Run imputation with Predictive Mean Matching (PMM) — flexible + safe for non-normal data
# imp <- mice(imp_data, m = 1, method = "pmm", seed = 123)
# 
# # Get completed data
# imputed_df <- complete(imp, 1)
# 
# # Replace original variables (optional: rename with `_imp` suffix if you want to track changes)
# anes[ , vars_for_impute] <- imputed_df
# 



a24 <- a24 %>% haven::zap_labels()

save(a24, file = here::here("text_data_output", "anes2024_merged.Rdata"))

